Introduction

This note project folder demonstrate the basics of spatial data analysis with R.

Required Package

library(tidyverse)
library(ggthemes)
theme_set(theme_map())

library(sf)
sf_use_s2(FALSE)
library(rnaturalearth)
library(rnaturalearthdata)

Load the World Map

Introducing a new format of geo-referenced data.

world = ne_countries(scale = "medium", type = "map_units", returnclass = "sf")
world |> ggplot() + geom_sf()

names(world)
##  [1] "scalerank"  "featurecla" "labelrank"  "sovereignt" "sov_a3"    
##  [6] "adm0_dif"   "level"      "type"       "admin"      "adm0_a3"   
## [11] "geou_dif"   "geounit"    "gu_a3"      "su_dif"     "subunit"   
## [16] "su_a3"      "brk_diff"   "name"       "name_long"  "brk_a3"    
## [21] "brk_name"   "brk_group"  "abbrev"     "postal"     "formal_en" 
## [26] "formal_fr"  "note_adm0"  "note_brk"   "name_sort"  "name_alt"  
## [31] "mapcolor7"  "mapcolor8"  "mapcolor9"  "mapcolor13" "pop_est"   
## [36] "gdp_md_est" "pop_year"   "lastcensus" "gdp_year"   "economy"   
## [41] "income_grp" "wikipedia"  "fips_10"    "iso_a2"     "iso_a3"    
## [46] "iso_n3"     "un_a3"      "wb_a2"      "wb_a3"      "woe_id"    
## [51] "adm0_a3_is" "adm0_a3_us" "adm0_a3_un" "adm0_a3_wb" "continent" 
## [56] "region_un"  "subregion"  "region_wb"  "name_len"   "long_len"  
## [61] "abbrev_len" "tiny"       "homepart"   "geometry"

Download Conflict Dataset

download.file(
  "https://ucdp.uu.se/downloads/ucdpprio/ucdp-prio-acd-221-csv.zip",
  destfile = "data/ucdp-prio-acd-221-csv.zip")

unzip("Lec_09/data/ucdp-prio-acd-221-csv.zip", exdir = "data")

# For more information: https://ucdp.uu.se/downloads/index.html#armedconflict

Load Conflict Dataset

d = read_csv("Lec_09/data/ucdp-prio-acd-221.csv")

Extract Entities of Interest

Extract country names and find the ISO3c codes using the package countrycode. This is a “lazy” approach. The most robust approach will be a locally customized dataset of country names.

library(countrycode)

# Take a look at the data
# Variables of interest: side_a, side_b

d |>
  select(year, side_a, side_b)
## # A tibble: 2,568 × 3
##     year side_a                    side_b                             
##    <dbl> <chr>                     <chr>                              
##  1  2012 Government of India       GNLA                               
##  2  2014 Government of India       GNLA                               
##  3  1967 Government of Egypt       Government of Israel               
##  4  1969 Government of Egypt       Government of Israel               
##  5  1970 Government of Egypt       Government of Israel               
##  6  1973 Government of Egypt       Government of Israel               
##  7  2011 Government of Sudan       Republic of South Sudan            
##  8  2011 Government of South Sudan SSDM/A, SSLM/A                     
##  9  2012 Government of South Sudan SSLM/A                             
## 10  2013 Government of South Sudan SPLM/A - IO, SSDM/A - Cobra Faction
## # ℹ 2,558 more rows
d_t = d |>
  mutate(
    side_a_country = countrycode(side_a, "country.name", "country.name"),
    side_a_iso3c = countrycode(side_a, "country.name", "iso3c"),
    .after = "side_a"
  ) |>
  mutate(
    side_b_country = countrycode(side_b, "country.name", "country.name"),
    side_b_iso3c = countrycode(side_b, "country.name", "iso3c"),
    .after = "side_b"
  )

# Need to check manually if the conversion is successful.
d_t |>
  select(year, side_a, side_a_country, side_a_iso3c, side_b, side_b_country, side_b_iso3c)
## # A tibble: 2,568 × 7
##     year side_a   side_a_country side_a_iso3c side_b side_b_country side_b_iso3c
##    <dbl> <chr>    <chr>          <chr>        <chr>  <chr>          <chr>       
##  1  2012 Governm… India          IND          GNLA   <NA>           <NA>        
##  2  2014 Governm… India          IND          GNLA   <NA>           <NA>        
##  3  1967 Governm… Egypt          EGY          Gover… Israel         ISR         
##  4  1969 Governm… Egypt          EGY          Gover… Israel         ISR         
##  5  1970 Governm… Egypt          EGY          Gover… Israel         ISR         
##  6  1973 Governm… Egypt          EGY          Gover… Israel         ISR         
##  7  2011 Governm… Sudan          SDN          Repub… South Sudan    SSD         
##  8  2011 Governm… South Sudan    SSD          SSDM/… <NA>           <NA>        
##  9  2012 Governm… South Sudan    SSD          SSLM/A <NA>           <NA>        
## 10  2013 Governm… South Sudan    SSD          SPLM/… <NA>           <NA>        
## # ℹ 2,558 more rows
# It looks already. Then filter only the country-country conflicts
d_t_s = d_t |>
  filter(!is.na(side_a_country) & !is.na(side_b_country))

# OK. I don't think this dataset is entirely clean yet. 
# If this is a real research project, you need to do more manual check.
# But for demonstration purpose, let's use it as it is.

Get the centroids of each country

To draw the network, we need points representing countries’ locations). Using the map we already halv, we find the geographic “centroids” for each country using functions from the sf package.

# Calculate the countries' "centroids"
country_centroid = st_centroid(world$geometry) 

# Convert the centroids to a dataframe
country_centroid_df = country_centroid |> 
  st_coordinates() |>
  as_tibble() |>
  rename("centroid_xcoord" = "X", "centroid_ycoord" = "Y")

world_t = world |>
  bind_cols(country_centroid_df)

To check whether the geogrphic centroids are correctly specified we plot a world map along with the points of centroids. Check if the maps make sense and what strike you as interesting/ unintuitive.

world_t |>
  ggplot() +
  geom_sf() +
  geom_point(aes(x = centroid_xcoord, y = centroid_ycoord))

Merge the world map with the network data

Now we have the locations of each country (using their geographic centroids). Merge them with the conflict dataset

# Get a dataset with only countries' identifiers and locations of their centroids
world_t_centroid = world_t |>
  as_tibble() |>
  select(iso_a3, centroid_xcoord, centroid_ycoord) |>
  filter(!is.na(iso_a3)) # There are few entries with no country identifiers. We do not need them.


d_t_s_2 = d_t_s |>
  left_join(world_t_centroid, by = c("side_a_iso3c" = "iso_a3")) |>
  rename("centroid_xcoord_a" = "centroid_xcoord", "centroid_ycoord_a" = "centroid_ycoord") |>
  left_join(world_t_centroid, by = c("side_b_iso3c"= "iso_a3")) |>
  rename("centroid_xcoord_b" = "centroid_xcoord", "centroid_ycoord_b" = "centroid_ycoord")


names(d_t_s_2)
##  [1] "conflict_id"          "location"             "side_a"              
##  [4] "side_a_country"       "side_a_iso3c"         "side_a_id"           
##  [7] "side_a_2nd"           "side_b"               "side_b_country"      
## [10] "side_b_iso3c"         "side_b_id"            "side_b_2nd"          
## [13] "incompatibility"      "territory_name"       "year"                
## [16] "intensity_level"      "cumulative_intensity" "type_of_conflict"    
## [19] "start_date"           "start_prec"           "start_date2"         
## [22] "start_prec2"          "ep_end"               "ep_end_date"         
## [25] "ep_end_prec"          "gwno_a"               "gwno_a_2nd"          
## [28] "gwno_b"               "gwno_b_2nd"           "gwno_loc"            
## [31] "region"               "version"              "centroid_xcoord_a"   
## [34] "centroid_ycoord_a"    "centroid_xcoord_b"    "centroid_ycoord_b"

Finally, we can plot the map

# ggplot() +
#   geom_sf(data = world_t) +
#   geom_point(data = d_t_s_2, aes(x = centroid_xcoord_a, y = centroid_ycoord_a)) +
#   geom_point(data = d_t_s_2, aes(x = centroid_xcoord_b, y = centroid_ycoord_b)) +
#   geom_curve(data = d_t_s_2, aes(x = centroid_xcoord_a, y = centroid_ycoord_a,
#                                  xend = centroid_xcoord_a, yend = centroid_ycoord_b))
# ERROR above!

# Remove countries to self
d_t_s_3 = d_t_s_2 |>
  filter(!(side_a_iso3c == side_b_iso3c))

ggplot() +
  geom_sf(data = world_t) +
  geom_point(data = d_t_s_3, aes(x = centroid_xcoord_a, y = centroid_ycoord_a)) +
  geom_point(data = d_t_s_3, aes(x = centroid_xcoord_b, y = centroid_ycoord_b)) +
  geom_curve(data = d_t_s_3, aes(x = centroid_xcoord_a, 
                                 y = centroid_ycoord_a,
                                 xend = centroid_xcoord_b, 
                                 yend = centroid_ycoord_b), 
             curvature = 0.2, color = "red", alpah = 0.5)

ggplot() +
  geom_sf(data = world_t) +
  geom_text(data = d_t_s_3, aes(x = centroid_xcoord_a, y = centroid_ycoord_a, label = side_a_iso3c)) +
  geom_text(data = d_t_s_3, aes(x = centroid_xcoord_b, y = centroid_ycoord_b, label = side_b_iso3c)) +
  geom_curve(data = d_t_s_3, aes(x = centroid_xcoord_a, 
                                 y = centroid_ycoord_a,
                                 xend = centroid_xcoord_b, 
                                 yend = centroid_ycoord_b), curvature = 0.2, color = "red", alpah = 0.5)

Advanced figure, considering the number of conflicts

d_t_s_4 = d_t_s_3 |>
  group_by(side_a_iso3c, side_b_iso3c, 
           centroid_xcoord_a, centroid_ycoord_a, 
           centroid_xcoord_b, centroid_ycoord_b) |>
  count()

ggplot() +
  geom_sf(data = world_t) +
  geom_point(data = d_t_s_4, aes(x = centroid_xcoord_a, y = centroid_ycoord_a)) +
  geom_point(data = d_t_s_4, aes(x = centroid_xcoord_b, y = centroid_ycoord_b)) +
  geom_curve(data = d_t_s_4, aes(x = centroid_xcoord_a, 
                                 y = centroid_ycoord_a,
                                 xend = centroid_xcoord_b, 
                                 yend = centroid_ycoord_b,
                                 linewidth = n), 
             curvature = 0.2, color = "red", alpha = 0.5)

ggplot() +
  geom_sf(data = world_t) +
  geom_text(data = d_t_s_3, aes(x = centroid_xcoord_a, y = centroid_ycoord_a, label = side_a_iso3c)) +
  geom_text(data = d_t_s_3, aes(x = centroid_xcoord_b, y = centroid_ycoord_b, label = side_b_iso3c)) +
  geom_curve(data = d_t_s_4, aes(x = centroid_xcoord_a, 
                                 y = centroid_ycoord_a,
                                 xend = centroid_xcoord_b, 
                                 yend = centroid_ycoord_b,
                                 linewidth = n), 
             curvature = 0.2, color = "red", alpha = 0.5)

# Q: Why does the text look weird?